################################################################################
##
## Purpose: This script creates all figures and tables from SI section 1
##
## Author: James Bisbee (james.h.bisbee@vanderbilt.edu)
##
## Input Files:
##  - ./data/prepped/finalData.RData: Prepped data from 9_DATA_final_build.R
##
## Output Files:
##  - ./output/tables/SI_table_1.tex
##  - ./output/tables/SI_table_2.tex
##  - ./output/figures/SI_figure_2.pdf
##  - ./output/figures/SI_figure_3.pdf
##
##
## See associated log file for compute environment, package versions, 
##  and date of most recent run.
##
################################################################################
rm(list = ls())
gc()
require(tidyverse)
require(ggridges)
require(ggrepel)
require(marginaleffects)
require(vtable)

set.seed(123)

# Compute details
print(paste0('Compute environment from ',Sys.Date(),' run by Bisbee'))
if(Sys.info()['sysname'] == 'Windows') {
  ram_size = system("wmic MemoryChip get Capacity", intern = TRUE)[-1]
  model_name = system("wmic cpu get name", intern = TRUE)[2] # nocov
  vendor_id = system("wmic cpu get manufacturer", intern = TRUE)[2] # nocov
  
  print(list(ram = stringr::str_squish(ram_size)[1],
             vendor_id = stringr::str_squish(vendor_id),
             model_name = stringr::str_squish(model_name),
             no_of_cores = parallel::detectCores()))
} else if(Sys.info()['sysname'] == 'Linuxs') {
  splitted <- strsplit(system("ps -C rsession -o %cpu,%mem,pid,cmd", intern = TRUE), " ")
  df <- do.call(rbind, lapply(splitted[-1], 
                              function(x) data.frame(
                                cpu = as.numeric(x[2]),
                                mem = as.numeric(x[4]),
                                pid = as.numeric(x[5]),
                                cmd = paste(x[-c(1:5)], collapse = " "))))
  df
} else {
  cat("If not on Linux or Windows, you'll have to figure out your own solution to seeing the compute environment.")
}

sessionInfo()


load('./data/prepped/finalData.RData')


# Summary stats.
forSum <- utterance_level %>%
  mutate(Fed = ifelse(grepl('YELLEN',opensecretsID),'Janet Yellen',
                      ifelse(grepl('GREENSPAN',opensecretsID),'Alan Greenspan',
                             ifelse(grepl('BERNANKE',opensecretsID),'Ben Bernanke',
                                    ifelse(grepl('POWELL',opensecretsID),'Jerome Powell',NA)))),
         lagFed = ifelse(grepl('Fed Chair',lag(position)),1,0)) %>%
  select(`Interrupting Utterance` = interruptor,`Interrupted Utterance` = interrupted,
         `Total Utterances` = tot_utterances,`Uttrnc Length (# Chars)` = nchars,
         Year = year,Chamber = chamber,Position = position,`Fed Chair` = Fed,Party = party,Gender = gender,
         `Constrain FED` = constrain_empower_tot,`Oversight FED` = oversight_indep_tot,`Vote for Yellen` = yellen_vote,
         `# Children` = nKids,`# Daughters` = nDaughters,`# Sons` = nSons,Age = age,Seniority = seniority,
         `Vote Share` = votepct,`Ideology (DW-NOM)` = nominate_dim1)

st(forSum,title = 'Summary Statistics: Utterance-Level',
   out = 'latex',file = './output/tables/SI_table_1.tex')

st(utterance_level %>%
  mutate(Fed = ifelse(grepl('YELLEN',opensecretsID),'Janet Yellen',
                      ifelse(grepl('GREENSPAN',opensecretsID),'Alan Greenspan',
                             ifelse(grepl('BERNANKE',opensecretsID),'Ben Bernanke',
                                    ifelse(grepl('POWELL',opensecretsID),'Jerome Powell',NA)))),
         lagFed = ifelse(grepl('Fed Chair',lag(position)),1,0)) %>%
  select(opensecretsID,
         Year = year,Chamber = chamber,Party = party,Gender = gender,
         `Constrain FED` = constrain_empower_tot,`Oversight FED` = oversight_indep_tot,`Vote for Yellen` = yellen_vote,
         `# Children` = nKids,`# Daughters` = nDaughters,`# Sons` = nSons,Age = age,Seniority = seniority,
         `Vote Share` = votepct,`Ideology` = nominate_dim1) %>%
  group_by(opensecretsID,Chamber) %>%
  slice(1),out = 'latex',
  file = './output/tables/SI_table_2.tex',title = 'Summary Statistics: Speaker-Level')

# Loading data
hearings <- read_csv('./data/prepped/hearings/cleaned_docs.csv') %>%
  select(-matches('\\.\\.\\.1|X1')) %>%
  mutate(X1 = row_number()) 



pdf('./output/figures/SI_figure_2.pdf',width = 7,height = 5)
hearings %>%
  mutate(text = trimws(gsub('^(Mr\\.|Mrs\\.|Ms\\.) .*?\\.','',text))) %>%
  mutate(nchars = nchar(text)) %>%
  ggplot(aes(x = nchars)) + 
  geom_histogram(alpha = .5) + 
  scale_x_log10() + xlab('Number of characters per utterance (logged)') + 
  ylab('') + 
  theme_ridges()
dev.off()


# Over hearings
toplot <- hearings %>%
  mutate(text = trimws(gsub('^(Mr\\.|Mrs\\.|Ms\\.|Chairman) .*?\\.','',text))) %>%
  mutate(nchars = nchar(text)) %>%
  group_by(docID) %>%
  mutate(seqInd = row_number()) %>%
  filter(docID == 'fed2017-02-15.txt') %>%
  mutate(hl1 = ifelse(X1 %in% 18161:18167,1,0)) %>%
  group_by(hl1) %>%
  mutate(label = ifelse(hl1 == 1,paste(paste0('    ',speaker,': ',gsub('--\\\r\\\r\\\n--','----',text)),collapse = '\n'),NA))

pdf('./output/figures/SI_figure_3.pdf',width = 7,height = 6)
toplot %>%
  ggplot(aes(x = seqInd,y = nchars,fill = position,label = label)) + 
  geom_bar(stat = 'identity') + 
  theme_ridges() + xlab('Utterance Sequence') + ylab('Number of characters per utterance') + 
  scale_fill_manual(name = '',values = c('red','gold','grey60')) + 
  theme(legend.position = 'bottom') + 
  labs(title = 'Hearing before the House Committee on Financial Services',
       subtitle ='February 15th, 2017') + 
  geom_curve(aes(x = 50, y = 6500, xend = 23, yend = 600), 
             colour = "black", 
             size=.5, 
             curvature = .2,
             arrow = arrow(length = unit(0.03, "npc"))) +
  geom_label(data = toplot %>% filter(X1 == 18161),hjust = 0,vjust = 0,size =2.5,nudge_y = 3000,nudge_x = 30,fill = 'white')
dev.off()

# EOF